Open
Conversation
|
Looks like this is already supported upstream (the allocs and zero, that is) - picking this commit to HF's Candle gives us: $ git diff origin/main
diff --git c/candle-core/src/cuda_backend/device.rs w/candle-core/src/cuda_backend/device.rs
index a8a43121..2ac9eb6b 100644
--- c/candle-core/src/cuda_backend/device.rs
+++ w/candle-core/src/cuda_backend/device.rs
@@ -1,4 +1,5 @@
-use crate::backend::BackendDevice;
+use super::{CudaError, CudaStorage, CudaStorageSlice, WrapErr};
+use crate::backend::{BackendDevice, BackendStorage};
use crate::{CpuStorage, CpuStorageRef, DType, Layout, Result, Shape};
pub use candle_kernels as kernels;
pub use cudarc;
@@ -8,8 +9,6 @@ use half::{bf16, f16};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
-use super::{CudaError, CudaStorage, CudaStorageSlice, WrapErr};
-
/// Unique identifier for cuda devices.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct DeviceId(usize);
@@ -346,6 +345,12 @@ impl BackendDevice for CudaDevice {
DType::F8E4M3 => {
let data = self.alloc_zeros::<F8E4M3>(elem_count)?;
CudaStorageSlice::F8E4M3(data)
+ // return Err(CudaError::InternalError("F8E4M3 not supported in CUDA backend").into())
+ }
+ DType::F6E2M3 | DType::F6E3M2 | DType::F4 | DType::F8E8M0 => {
+ return Err(
+ CudaError::InternalError("Dummy types not supported in CUDA backend").into(),
+ )
}
};
Ok(CudaStorage {
@@ -465,6 +470,12 @@ impl BackendDevice for CudaDevice {
DType::F8E4M3 => {
let data = self.alloc::<F8E4M3>(elem_count)?;
CudaStorageSlice::F8E4M3(data)
+ // return Err(CudaError::InternalError("F8E4M3 not supported in CUDA backend").into())
+ }
+ DType::F6E2M3 | DType::F6E3M2 | DType::F4 | DType::F8E8M0 => {
+ return Err(
+ CudaError::InternalError("Dummy types not supported in CUDA backend").into(),
+ )
}
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Alloc and zero implementations for FP8, this is necessary if we want to create the empty fp8 kvcache in mistral.rs.